1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.apache.lucene.util.automaton;
31
32 import org.apache.lucene.util.ArrayUtil;
33 import org.apache.lucene.util.BytesRef;
34 import org.apache.lucene.util.BytesRefBuilder;
35 import org.apache.lucene.util.IntsRef;
36 import org.apache.lucene.util.IntsRefBuilder;
37 import org.apache.lucene.util.RamUsageEstimator;
38
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.BitSet;
42 import java.util.Collection;
43 import java.util.HashMap;
44 import java.util.HashSet;
45 import java.util.LinkedList;
46 import java.util.List;
47 import java.util.Map;
48 import java.util.Set;
49
50
51
52
53
54
55 final public class Operations {
56
57
58
59 public static final int DEFAULT_MAX_DETERMINIZED_STATES = 10000;
60
61 private Operations() {}
62
63
64
65
66
67
68
69 static public Automaton concatenate(Automaton a1, Automaton a2) {
70 return concatenate(Arrays.asList(a1, a2));
71 }
72
73
74
75
76
77
78
79 static public Automaton concatenate(List<Automaton> l) {
80 Automaton result = new Automaton();
81
82
83 for(Automaton a : l) {
84 if (a.getNumStates() == 0) {
85 result.finishState();
86 return result;
87 }
88 int numStates = a.getNumStates();
89 for(int s=0;s<numStates;s++) {
90 result.createState();
91 }
92 }
93
94
95
96 int stateOffset = 0;
97 Transition t = new Transition();
98 for(int i=0;i<l.size();i++) {
99 Automaton a = l.get(i);
100 int numStates = a.getNumStates();
101
102 Automaton nextA = (i == l.size()-1) ? null : l.get(i+1);
103
104 for(int s=0;s<numStates;s++) {
105 int numTransitions = a.initTransition(s, t);
106 for(int j=0;j<numTransitions;j++) {
107 a.getNextTransition(t);
108 result.addTransition(stateOffset + s, stateOffset + t.dest, t.min, t.max);
109 }
110
111 if (a.isAccept(s)) {
112 Automaton followA = nextA;
113 int followOffset = stateOffset;
114 int upto = i+1;
115 while (true) {
116 if (followA != null) {
117
118 numTransitions = followA.initTransition(0, t);
119 for(int j=0;j<numTransitions;j++) {
120 followA.getNextTransition(t);
121 result.addTransition(stateOffset + s, followOffset + numStates + t.dest, t.min, t.max);
122 }
123 if (followA.isAccept(0)) {
124
125 followOffset += followA.getNumStates();
126 followA = (upto == l.size()-1) ? null : l.get(upto+1);
127 upto++;
128 } else {
129 break;
130 }
131 } else {
132 result.setAccept(stateOffset + s, true);
133 break;
134 }
135 }
136 }
137 }
138
139 stateOffset += numStates;
140 }
141
142 if (result.getNumStates() == 0) {
143 result.createState();
144 }
145
146 result.finishState();
147
148 return result;
149 }
150
151
152
153
154
155
156
157 static public Automaton optional(Automaton a) {
158 Automaton result = new Automaton();
159 result.createState();
160 result.setAccept(0, true);
161 if (a.getNumStates() > 0) {
162 result.copy(a);
163 result.addEpsilon(0, 1);
164 }
165 result.finishState();
166 return result;
167 }
168
169
170
171
172
173
174
175
176 static public Automaton repeat(Automaton a) {
177 if (a.getNumStates() == 0) {
178
179 return a;
180 }
181 Automaton.Builder builder = new Automaton.Builder();
182 builder.createState();
183 builder.setAccept(0, true);
184 builder.copy(a);
185
186 Transition t = new Transition();
187 int count = a.initTransition(0, t);
188 for(int i=0;i<count;i++) {
189 a.getNextTransition(t);
190 builder.addTransition(0, t.dest+1, t.min, t.max);
191 }
192
193 int numStates = a.getNumStates();
194 for(int s=0;s<numStates;s++) {
195 if (a.isAccept(s)) {
196 count = a.initTransition(0, t);
197 for(int i=0;i<count;i++) {
198 a.getNextTransition(t);
199 builder.addTransition(s+1, t.dest+1, t.min, t.max);
200 }
201 }
202 }
203
204 return builder.finish();
205 }
206
207
208
209
210
211
212
213 static public Automaton repeat(Automaton a, int count) {
214 if (count == 0) {
215 return repeat(a);
216 }
217 List<Automaton> as = new ArrayList<>();
218 while (count-- > 0) {
219 as.add(a);
220 }
221 as.add(repeat(a));
222 return concatenate(as);
223 }
224
225
226
227
228
229
230
231
232
233 static public Automaton repeat(Automaton a, int min, int max) {
234 if (min > max) {
235 return Automata.makeEmpty();
236 }
237
238 Automaton b;
239 if (min == 0) {
240 b = Automata.makeEmptyString();
241 } else if (min == 1) {
242 b = new Automaton();
243 b.copy(a);
244 } else {
245 List<Automaton> as = new ArrayList<>();
246 for(int i=0;i<min;i++) {
247 as.add(a);
248 }
249 b = concatenate(as);
250 }
251
252 Set<Integer> prevAcceptStates = toSet(b, 0);
253 Automaton.Builder builder = new Automaton.Builder();
254 builder.copy(b);
255 for(int i=min;i<max;i++) {
256 int numStates = builder.getNumStates();
257 builder.copy(a);
258 for(int s : prevAcceptStates) {
259 builder.addEpsilon(s, numStates);
260 }
261 prevAcceptStates = toSet(a, numStates);
262 }
263
264 return builder.finish();
265 }
266
267 private static Set<Integer> toSet(Automaton a, int offset) {
268 int numStates = a.getNumStates();
269 BitSet isAccept = a.getAcceptStates();
270 Set<Integer> result = new HashSet<Integer>();
271 int upto = 0;
272 while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
273 result.add(offset+upto);
274 upto++;
275 }
276
277 return result;
278 }
279
280
281
282
283
284
285
286
287
288
289
290 static public Automaton complement(Automaton a, int maxDeterminizedStates) {
291 a = totalize(determinize(a, maxDeterminizedStates));
292 int numStates = a.getNumStates();
293 for (int p=0;p<numStates;p++) {
294 a.setAccept(p, !a.isAccept(p));
295 }
296 return removeDeadStates(a);
297 }
298
299
300
301
302
303
304
305
306
307
308 static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) {
309 if (Operations.isEmpty(a1) || a1 == a2) {
310 return Automata.makeEmpty();
311 }
312 if (Operations.isEmpty(a2)) {
313 return a1;
314 }
315 return intersection(a1, complement(a2, maxDeterminizedStates));
316 }
317
318
319
320
321
322
323
324 static public Automaton intersection(Automaton a1, Automaton a2) {
325 if (a1 == a2) {
326 return a1;
327 }
328 if (a1.getNumStates() == 0) {
329 return a1;
330 }
331 if (a2.getNumStates() == 0) {
332 return a2;
333 }
334 Transition[][] transitions1 = a1.getSortedTransitions();
335 Transition[][] transitions2 = a2.getSortedTransitions();
336 Automaton c = new Automaton();
337 c.createState();
338 LinkedList<StatePair> worklist = new LinkedList<>();
339 HashMap<StatePair,StatePair> newstates = new HashMap<>();
340 StatePair p = new StatePair(0, 0, 0);
341 worklist.add(p);
342 newstates.put(p, p);
343 while (worklist.size() > 0) {
344 p = worklist.removeFirst();
345 c.setAccept(p.s, a1.isAccept(p.s1) && a2.isAccept(p.s2));
346 Transition[] t1 = transitions1[p.s1];
347 Transition[] t2 = transitions2[p.s2];
348 for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
349 while (b2 < t2.length && t2[b2].max < t1[n1].min)
350 b2++;
351 for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++)
352 if (t2[n2].max >= t1[n1].min) {
353 StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
354 StatePair r = newstates.get(q);
355 if (r == null) {
356 q.s = c.createState();
357 worklist.add(q);
358 newstates.put(q, q);
359 r = q;
360 }
361 int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
362 int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
363 c.addTransition(p.s, r.s, min, max);
364 }
365 }
366 }
367 c.finishState();
368
369 return removeDeadStates(c);
370 }
371
372
373
374
375
376
377 public static boolean sameLanguage(Automaton a1, Automaton a2) {
378 if (a1 == a2) {
379 return true;
380 }
381 return subsetOf(a2, a1) && subsetOf(a1, a2);
382 }
383
384
385
386
387
388 public static boolean hasDeadStates(Automaton a) {
389 BitSet liveStates = getLiveStates(a);
390 int numLive = liveStates.cardinality();
391 int numStates = a.getNumStates();
392 assert numLive <= numStates: "numLive=" + numLive + " numStates=" + numStates + " " + liveStates;
393 return numLive < numStates;
394 }
395
396
397
398 public static boolean hasDeadStatesFromInitial(Automaton a) {
399 BitSet reachableFromInitial = getLiveStatesFromInitial(a);
400 BitSet reachableFromAccept = getLiveStatesToAccept(a);
401 reachableFromInitial.andNot(reachableFromAccept);
402 return reachableFromInitial.isEmpty() == false;
403 }
404
405
406
407 public static boolean hasDeadStatesToAccept(Automaton a) {
408 BitSet reachableFromInitial = getLiveStatesFromInitial(a);
409 BitSet reachableFromAccept = getLiveStatesToAccept(a);
410 reachableFromAccept.andNot(reachableFromInitial);
411 return reachableFromAccept.isEmpty() == false;
412 }
413
414
415
416
417
418
419
420
421 public static boolean subsetOf(Automaton a1, Automaton a2) {
422 if (a1.isDeterministic() == false) {
423 throw new IllegalArgumentException("a1 must be deterministic");
424 }
425 if (a2.isDeterministic() == false) {
426 throw new IllegalArgumentException("a2 must be deterministic");
427 }
428 assert hasDeadStatesFromInitial(a1) == false;
429 assert hasDeadStatesFromInitial(a2) == false;
430 if (a1.getNumStates() == 0) {
431
432 return true;
433 } else if (a2.getNumStates() == 0) {
434 return isEmpty(a1);
435 }
436
437
438 Transition[][] transitions1 = a1.getSortedTransitions();
439 Transition[][] transitions2 = a2.getSortedTransitions();
440 LinkedList<StatePair> worklist = new LinkedList<>();
441 HashSet<StatePair> visited = new HashSet<>();
442 StatePair p = new StatePair(0, 0);
443 worklist.add(p);
444 visited.add(p);
445 while (worklist.size() > 0) {
446 p = worklist.removeFirst();
447 if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
448 return false;
449 }
450 Transition[] t1 = transitions1[p.s1];
451 Transition[] t2 = transitions2[p.s2];
452 for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
453 while (b2 < t2.length && t2[b2].max < t1[n1].min) {
454 b2++;
455 }
456 int min1 = t1[n1].min, max1 = t1[n1].max;
457
458 for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
459 if (t2[n2].min > min1) {
460 return false;
461 }
462 if (t2[n2].max < Character.MAX_CODE_POINT) {
463 min1 = t2[n2].max + 1;
464 } else {
465 min1 = Character.MAX_CODE_POINT;
466 max1 = Character.MIN_CODE_POINT;
467 }
468 StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
469 if (!visited.contains(q)) {
470 worklist.add(q);
471 visited.add(q);
472 }
473 }
474 if (min1 <= max1) {
475 return false;
476 }
477 }
478 }
479 return true;
480 }
481
482
483
484
485
486
487
488 public static Automaton union(Automaton a1, Automaton a2) {
489 return union(Arrays.asList(a1, a2));
490 }
491
492
493
494
495
496
497
498 public static Automaton union(Collection<Automaton> l) {
499 Automaton result = new Automaton();
500
501
502 result.createState();
503
504
505 for(Automaton a : l) {
506 result.copy(a);
507 }
508
509
510 int stateOffset = 1;
511 for(Automaton a : l) {
512 if (a.getNumStates() == 0) {
513 continue;
514 }
515 result.addEpsilon(0, stateOffset);
516 stateOffset += a.getNumStates();
517 }
518
519 result.finishState();
520
521 return removeDeadStates(result);
522 }
523
524
525 private final static class TransitionList {
526
527 int[] transitions = new int[3];
528 int next;
529
530 public void add(Transition t) {
531 if (transitions.length < next+3) {
532 transitions = ArrayUtil.grow(transitions, next+3);
533 }
534 transitions[next] = t.dest;
535 transitions[next+1] = t.min;
536 transitions[next+2] = t.max;
537 next += 3;
538 }
539 }
540
541
542
543 private final static class PointTransitions implements Comparable<PointTransitions> {
544 int point;
545 final TransitionList ends = new TransitionList();
546 final TransitionList starts = new TransitionList();
547
548 @Override
549 public int compareTo(PointTransitions other) {
550 return point - other.point;
551 }
552
553 public void reset(int point) {
554 this.point = point;
555 ends.next = 0;
556 starts.next = 0;
557 }
558
559 @Override
560 public boolean equals(Object other) {
561 return ((PointTransitions) other).point == point;
562 }
563
564 @Override
565 public int hashCode() {
566 return point;
567 }
568 }
569
570 private final static class PointTransitionSet {
571 int count;
572 PointTransitions[] points = new PointTransitions[5];
573
574 private final static int HASHMAP_CUTOVER = 30;
575 private final HashMap<Integer,PointTransitions> map = new HashMap<>();
576 private boolean useHash = false;
577
578 private PointTransitions next(int point) {
579
580 if (count == points.length) {
581 final PointTransitions[] newArray = new PointTransitions[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
582 System.arraycopy(points, 0, newArray, 0, count);
583 points = newArray;
584 }
585 PointTransitions points0 = points[count];
586 if (points0 == null) {
587 points0 = points[count] = new PointTransitions();
588 }
589 points0.reset(point);
590 count++;
591 return points0;
592 }
593
594 private PointTransitions find(int point) {
595 if (useHash) {
596 final Integer pi = point;
597 PointTransitions p = map.get(pi);
598 if (p == null) {
599 p = next(point);
600 map.put(pi, p);
601 }
602 return p;
603 } else {
604 for(int i=0;i<count;i++) {
605 if (points[i].point == point) {
606 return points[i];
607 }
608 }
609
610 final PointTransitions p = next(point);
611 if (count == HASHMAP_CUTOVER) {
612
613 assert map.size() == 0;
614 for(int i=0;i<count;i++) {
615 map.put(points[i].point, points[i]);
616 }
617 useHash = true;
618 }
619 return p;
620 }
621 }
622
623 public void reset() {
624 if (useHash) {
625 map.clear();
626 useHash = false;
627 }
628 count = 0;
629 }
630
631 public void sort() {
632
633 if (count > 1) ArrayUtil.timSort(points, 0, count);
634 }
635
636 public void add(Transition t) {
637 find(t.min).starts.add(t);
638 find(1+t.max).ends.add(t);
639 }
640
641 @Override
642 public String toString() {
643 StringBuilder s = new StringBuilder();
644 for(int i=0;i<count;i++) {
645 if (i > 0) {
646 s.append(' ');
647 }
648 s.append(points[i].point).append(':').append(points[i].starts.next/3).append(',').append(points[i].ends.next/3);
649 }
650 return s.toString();
651 }
652 }
653
654
655
656
657
658
659
660
661
662
663
664
665
666 public static Automaton determinize(Automaton a, int maxDeterminizedStates) {
667 if (a.isDeterministic()) {
668
669 return a;
670 }
671 if (a.getNumStates() <= 1) {
672
673 return a;
674 }
675
676
677 Automaton.Builder b = new Automaton.Builder();
678
679
680
681
682 SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(0, 0);
683
684
685 b.createState();
686
687 LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<>();
688 Map<SortedIntSet.FrozenIntSet,Integer> newstate = new HashMap<>();
689
690 worklist.add(initialset);
691
692 b.setAccept(0, a.isAccept(0));
693 newstate.put(initialset, 0);
694
695
696 final PointTransitionSet points = new PointTransitionSet();
697
698
699 final SortedIntSet statesSet = new SortedIntSet(5);
700
701 Transition t = new Transition();
702
703 while (worklist.size() > 0) {
704 SortedIntSet.FrozenIntSet s = worklist.removeFirst();
705
706
707
708 for(int i=0;i<s.values.length;i++) {
709 final int s0 = s.values[i];
710 int numTransitions = a.getNumTransitions(s0);
711 a.initTransition(s0, t);
712 for(int j=0;j<numTransitions;j++) {
713 a.getNextTransition(t);
714 points.add(t);
715 }
716 }
717
718 if (points.count == 0) {
719
720 continue;
721 }
722
723 points.sort();
724
725 int lastPoint = -1;
726 int accCount = 0;
727
728 final int r = s.state;
729
730 for(int i=0;i<points.count;i++) {
731
732 final int point = points.points[i].point;
733
734 if (statesSet.upto > 0) {
735 assert lastPoint != -1;
736
737 statesSet.computeHash();
738
739 Integer q = newstate.get(statesSet);
740 if (q == null) {
741 q = b.createState();
742 if (q >= maxDeterminizedStates) {
743 throw new TooComplexToDeterminizeException(a, maxDeterminizedStates);
744 }
745 final SortedIntSet.FrozenIntSet p = statesSet.freeze(q);
746
747 worklist.add(p);
748 b.setAccept(q, accCount > 0);
749 newstate.put(p, q);
750 } else {
751 assert (accCount > 0 ? true:false) == b.isAccept(q): "accCount=" + accCount + " vs existing accept=" +
752 b.isAccept(q) + " states=" + statesSet;
753 }
754
755
756
757 b.addTransition(r, q, lastPoint, point-1);
758 }
759
760
761
762 int[] transitions = points.points[i].ends.transitions;
763 int limit = points.points[i].ends.next;
764 for(int j=0;j<limit;j+=3) {
765 int dest = transitions[j];
766 statesSet.decr(dest);
767 accCount -= a.isAccept(dest) ? 1:0;
768 }
769 points.points[i].ends.next = 0;
770
771
772
773 transitions = points.points[i].starts.transitions;
774 limit = points.points[i].starts.next;
775 for(int j=0;j<limit;j+=3) {
776 int dest = transitions[j];
777 statesSet.incr(dest);
778 accCount += a.isAccept(dest) ? 1:0;
779 }
780 lastPoint = point;
781 points.points[i].starts.next = 0;
782 }
783 points.reset();
784 assert statesSet.upto == 0: "upto=" + statesSet.upto;
785 }
786
787 Automaton result = b.finish();
788 assert result.isDeterministic();
789 return result;
790 }
791
792
793
794
795 public static boolean isEmpty(Automaton a) {
796 if (a.getNumStates() == 0) {
797
798 return true;
799 }
800 if (a.isAccept(0) == false && a.getNumTransitions(0) == 0) {
801
802 return true;
803 }
804 if (a.isAccept(0) == true) {
805
806 return false;
807 }
808
809 LinkedList<Integer> workList = new LinkedList<>();
810 BitSet seen = new BitSet(a.getNumStates());
811 workList.add(0);
812 seen.set(0);
813
814 Transition t = new Transition();
815 while (workList.isEmpty() == false) {
816 int state = workList.removeFirst();
817 if (a.isAccept(state)) {
818 return false;
819 }
820 int count = a.initTransition(state, t);
821 for(int i=0;i<count;i++) {
822 a.getNextTransition(t);
823 if (seen.get(t.dest) == false) {
824 workList.add(t.dest);
825 seen.set(t.dest);
826 }
827 }
828 }
829
830 return true;
831 }
832
833
834
835
836 public static boolean isTotal(Automaton a) {
837 return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
838 }
839
840
841
842
843
844 public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) {
845 if (a.isAccept(0) && a.getNumTransitions(0) == 1) {
846 Transition t = new Transition();
847 a.getTransition(0, 0, t);
848 return t.dest == 0
849 && t.min == minAlphabet
850 && t.max == maxAlphabet;
851 }
852 return false;
853 }
854
855
856
857
858
859
860
861
862 public static boolean run(Automaton a, String s) {
863 assert a.isDeterministic();
864 int state = 0;
865 for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
866 int nextState = a.step(state, cp = s.codePointAt(i));
867 if (nextState == -1) {
868 return false;
869 }
870 state = nextState;
871 }
872 return a.isAccept(state);
873 }
874
875
876
877
878
879
880
881
882 public static boolean run(Automaton a, IntsRef s) {
883 assert a.isDeterministic();
884 int state = 0;
885 for (int i=0;i<s.length;i++) {
886 int nextState = a.step(state, s.ints[s.offset+i]);
887 if (nextState == -1) {
888 return false;
889 }
890 state = nextState;
891 }
892 return a.isAccept(state);
893 }
894
895
896
897
898
899 private static BitSet getLiveStates(Automaton a) {
900 BitSet live = getLiveStatesFromInitial(a);
901 live.and(getLiveStatesToAccept(a));
902 return live;
903 }
904
905
906 private static BitSet getLiveStatesFromInitial(Automaton a) {
907 int numStates = a.getNumStates();
908 BitSet live = new BitSet(numStates);
909 if (numStates == 0) {
910 return live;
911 }
912 LinkedList<Integer> workList = new LinkedList<>();
913 live.set(0);
914 workList.add(0);
915
916 Transition t = new Transition();
917 while (workList.isEmpty() == false) {
918 int s = workList.removeFirst();
919 int count = a.initTransition(s, t);
920 for(int i=0;i<count;i++) {
921 a.getNextTransition(t);
922 if (live.get(t.dest) == false) {
923 live.set(t.dest);
924 workList.add(t.dest);
925 }
926 }
927 }
928
929 return live;
930 }
931
932
933 private static BitSet getLiveStatesToAccept(Automaton a) {
934 Automaton.Builder builder = new Automaton.Builder();
935
936
937 Transition t = new Transition();
938 int numStates = a.getNumStates();
939 for(int s=0;s<numStates;s++) {
940 builder.createState();
941 }
942 for(int s=0;s<numStates;s++) {
943 int count = a.initTransition(s, t);
944 for(int i=0;i<count;i++) {
945 a.getNextTransition(t);
946 builder.addTransition(t.dest, s, t.min, t.max);
947 }
948 }
949 Automaton a2 = builder.finish();
950
951 LinkedList<Integer> workList = new LinkedList<>();
952 BitSet live = new BitSet(numStates);
953 BitSet acceptBits = a.getAcceptStates();
954 int s = 0;
955 while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
956 live.set(s);
957 workList.add(s);
958 s++;
959 }
960
961 while (workList.isEmpty() == false) {
962 s = workList.removeFirst();
963 int count = a2.initTransition(s, t);
964 for(int i=0;i<count;i++) {
965 a2.getNextTransition(t);
966 if (live.get(t.dest) == false) {
967 live.set(t.dest);
968 workList.add(t.dest);
969 }
970 }
971 }
972
973 return live;
974 }
975
976
977
978
979
980 public static Automaton removeDeadStates(Automaton a) {
981 int numStates = a.getNumStates();
982 BitSet liveSet = getLiveStates(a);
983
984 int[] map = new int[numStates];
985
986 Automaton result = new Automaton();
987
988 for(int i=0;i<numStates;i++) {
989 if (liveSet.get(i)) {
990 map[i] = result.createState();
991 result.setAccept(map[i], a.isAccept(i));
992 }
993 }
994
995 Transition t = new Transition();
996
997 for (int i=0;i<numStates;i++) {
998 if (liveSet.get(i)) {
999 int numTransitions = a.initTransition(i, t);
1000
1001 for(int j=0;j<numTransitions;j++) {
1002 a.getNextTransition(t);
1003 if (liveSet.get(t.dest)) {
1004 result.addTransition(map[i], map[t.dest], t.min, t.max);
1005 }
1006 }
1007 }
1008 }
1009
1010 result.finishState();
1011 assert hasDeadStates(result) == false;
1012 return result;
1013 }
1014
1015
1016
1017
1018
1019 static int findIndex(int c, int[] points) {
1020 int a = 0;
1021 int b = points.length;
1022 while (b - a > 1) {
1023 int d = (a + b) >>> 1;
1024 if (points[d] > c) b = d;
1025 else if (points[d] < c) a = d;
1026 else return d;
1027 }
1028 return a;
1029 }
1030
1031
1032
1033
1034
1035 public static boolean isFinite(Automaton a) {
1036 if (a.getNumStates() == 0) {
1037 return true;
1038 }
1039 return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
1040 }
1041
1042
1043
1044
1045
1046
1047
1048 private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path, BitSet visited) {
1049 path.set(state);
1050 int numTransitions = a.initTransition(state, scratch);
1051 for(int t=0;t<numTransitions;t++) {
1052 a.getTransition(state, t, scratch);
1053 if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch, a, scratch.dest, path, visited))) {
1054 return false;
1055 }
1056 }
1057 path.clear(state);
1058 visited.set(state);
1059 return true;
1060 }
1061
1062
1063
1064
1065
1066
1067
1068 public static String getCommonPrefix(Automaton a) {
1069 if (a.isDeterministic() == false) {
1070 throw new IllegalArgumentException("input automaton must be deterministic");
1071 }
1072 StringBuilder b = new StringBuilder();
1073 HashSet<Integer> visited = new HashSet<>();
1074 int s = 0;
1075 boolean done;
1076 Transition t = new Transition();
1077 do {
1078 done = true;
1079 visited.add(s);
1080 if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
1081 a.getTransition(s, 0, t);
1082 if (t.min == t.max && !visited.contains(t.dest)) {
1083 b.appendCodePoint(t.min);
1084 s = t.dest;
1085 done = false;
1086 }
1087 }
1088 } while (!done);
1089
1090 return b.toString();
1091 }
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102 public static BytesRef getCommonPrefixBytesRef(Automaton a) {
1103 BytesRefBuilder builder = new BytesRefBuilder();
1104 HashSet<Integer> visited = new HashSet<>();
1105 int s = 0;
1106 boolean done;
1107 Transition t = new Transition();
1108 do {
1109 done = true;
1110 visited.add(s);
1111 if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
1112 a.getTransition(s, 0, t);
1113 if (t.min == t.max && !visited.contains(t.dest)) {
1114 builder.append((byte) t.min);
1115 s = t.dest;
1116 done = false;
1117 }
1118 }
1119 } while (!done);
1120
1121 return builder.get();
1122 }
1123
1124
1125
1126 public static IntsRef getSingleton(Automaton a) {
1127 if (a.isDeterministic() == false) {
1128 throw new IllegalArgumentException("input automaton must be deterministic");
1129 }
1130 IntsRefBuilder builder = new IntsRefBuilder();
1131 HashSet<Integer> visited = new HashSet<>();
1132 int s = 0;
1133 Transition t = new Transition();
1134 while (true) {
1135 visited.add(s);
1136 if (a.isAccept(s) == false) {
1137 if (a.getNumTransitions(s) == 1) {
1138 a.getTransition(s, 0, t);
1139 if (t.min == t.max && !visited.contains(t.dest)) {
1140 builder.append(t.min);
1141 s = t.dest;
1142 continue;
1143 }
1144 }
1145 } else if (a.getNumTransitions(s) == 0) {
1146 return builder.get();
1147 }
1148
1149
1150 return null;
1151 }
1152 }
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163 public static BytesRef getCommonSuffixBytesRef(Automaton a, int maxDeterminizedStates) {
1164
1165 Automaton r = Operations.determinize(reverse(a), maxDeterminizedStates);
1166 BytesRef ref = getCommonPrefixBytesRef(r);
1167 reverseBytes(ref);
1168 return ref;
1169 }
1170
1171 private static void reverseBytes(BytesRef ref) {
1172 if (ref.length <= 1) return;
1173 int num = ref.length >> 1;
1174 for (int i = ref.offset; i < ( ref.offset + num ); i++) {
1175 byte b = ref.bytes[i];
1176 ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
1177 ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
1178 }
1179 }
1180
1181
1182 public static Automaton reverse(Automaton a) {
1183 return reverse(a, null);
1184 }
1185
1186
1187 static Automaton reverse(Automaton a, Set<Integer> initialStates) {
1188
1189 if (Operations.isEmpty(a)) {
1190 return new Automaton();
1191 }
1192
1193 int numStates = a.getNumStates();
1194
1195
1196 Automaton.Builder builder = new Automaton.Builder();
1197
1198
1199 builder.createState();
1200
1201 for(int s=0;s<numStates;s++) {
1202 builder.createState();
1203 }
1204
1205
1206 builder.setAccept(1, true);
1207
1208 Transition t = new Transition();
1209 for (int s=0;s<numStates;s++) {
1210 int numTransitions = a.getNumTransitions(s);
1211 a.initTransition(s, t);
1212 for(int i=0;i<numTransitions;i++) {
1213 a.getNextTransition(t);
1214 builder.addTransition(t.dest+1, s+1, t.min, t.max);
1215 }
1216 }
1217
1218 Automaton result = builder.finish();
1219
1220 int s = 0;
1221 BitSet acceptStates = a.getAcceptStates();
1222 while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
1223 result.addEpsilon(0, s+1);
1224 if (initialStates != null) {
1225 initialStates.add(s+1);
1226 }
1227 s++;
1228 }
1229
1230 result.finishState();
1231
1232 return result;
1233 }
1234
1235
1236
1237
1238 static Automaton totalize(Automaton a) {
1239 Automaton result = new Automaton();
1240 int numStates = a.getNumStates();
1241 for(int i=0;i<numStates;i++) {
1242 result.createState();
1243 result.setAccept(i, a.isAccept(i));
1244 }
1245
1246 int deadState = result.createState();
1247 result.addTransition(deadState, deadState, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
1248
1249 Transition t = new Transition();
1250 for(int i=0;i<numStates;i++) {
1251 int maxi = Character.MIN_CODE_POINT;
1252 int count = a.initTransition(i, t);
1253 for(int j=0;j<count;j++) {
1254 a.getNextTransition(t);
1255 result.addTransition(i, t.dest, t.min, t.max);
1256 if (t.min > maxi) {
1257 result.addTransition(i, deadState, maxi, t.min-1);
1258 }
1259 if (t.max + 1 > maxi) {
1260 maxi = t.max + 1;
1261 }
1262 }
1263
1264 if (maxi <= Character.MAX_CODE_POINT) {
1265 result.addTransition(i, deadState, maxi, Character.MAX_CODE_POINT);
1266 }
1267 }
1268
1269 result.finishState();
1270 return result;
1271 }
1272
1273
1274
1275
1276
1277
1278 public static int[] topoSortStates(Automaton a) {
1279 if (a.getNumStates() == 0) {
1280 return new int[0];
1281 }
1282 int numStates = a.getNumStates();
1283 int[] states = new int[numStates];
1284 final BitSet visited = new BitSet(numStates);
1285 int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
1286
1287 if (upto < states.length) {
1288
1289 int[] newStates = new int[upto];
1290 System.arraycopy(states, 0, newStates, 0, upto);
1291 states = newStates;
1292 }
1293
1294
1295 for(int i=0;i<states.length/2;i++) {
1296 int s = states[i];
1297 states[i] = states[states.length-1-i];
1298 states[states.length-1-i] = s;
1299 }
1300
1301 return states;
1302 }
1303
1304 private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
1305 Transition t = new Transition();
1306 int count = a.initTransition(state, t);
1307 for (int i=0;i<count;i++) {
1308 a.getNextTransition(t);
1309 if (!visited.get(t.dest)) {
1310 visited.set(t.dest);
1311 upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
1312 }
1313 }
1314 states[upto] = state;
1315 upto++;
1316 return upto;
1317 }
1318 }